# Select relevant columns and clean the data
cleaned_data <- nyc_inspections %>%
select(boro, inspection_date, critical_flag, latitude, longitude, grade, grade_date) %>%
filter(!is.na(boro) & !is.na(inspection_date) & !is.na(latitude) & !is.na(longitude) & !is.na(grade)) %>%
mutate(inspection_date = ymd(inspection_date),
grade_date = ymd(grade_date))
## Warning: There were 2 warnings in `mutate()`.
## The first warning was:
## ℹ In argument: `inspection_date = ymd(inspection_date)`.
## Caused by warning:
## ! All formats failed to parse. No formats found.
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 1 remaining warning.
# Count of grades per borough
grade_boro_count <- cleaned_data %>%
group_by(boro, grade) %>%
summarise(count = n()) %>%
ungroup()
## `summarise()` has grouped output by 'boro'. You can override using the
## `.groups` argument.
Column
Chart A
plot_ly(grade_boro_count, x = ~boro, y = ~count, color = ~grade, type = "bar") %>%
layout(title = "Distribution of Inspection Grades by Borough",
xaxis = list(title = "Borough"),
yaxis = list(title = "Count"))
Column
Chart B
plot_ly(cleaned_data, x = ~longitude, y = ~latitude, type = 'scatter', mode = 'markers',
color = ~grade, text = ~paste("Grade:", grade, "<br>Borough:", boro)) %>%
layout(title = "Inspection Locations by Grade",
xaxis = list(title = "Longitude"),
yaxis = list(title = "Latitude"))
Chart C
plot_ly(cleaned_data, x = ~critical_flag, y = ~grade, type = "box") %>%
layout(title = "Distribution of Grades by Critical Violation Flag",
xaxis = list(title = "Critical Flag"),
yaxis = list(title = "Grade"))